Fire Data: Preprocessing

Combining all California Campfire Data

In this section, we read and process all the fire data for the California Campfire week.

# Combine data from different days
filelist = list.files(path="../data/fire/", pattern = "*.kml")

#assuming tab separated values with a header    
datalist = lapply(filelist, function(x)st_read(paste("../data/fire/", x, sep="")))

#assuming the same header/columns for all files
fire <- do.call("rbind", datalist) 
# Combine data from different days
filelist = list.files(path="../data/fire/", pattern = "*.txt")

#assuming tab separated values with a header    
datalist = lapply(filelist, function(x)read.delim(paste("../data/fire/",x,sep=""), sep=",", strip.white=TRUE))

#assuming the same header/columns for all files
firetxt <- do.call("rbind", datalist) 

firetxt <- firetxt %>%
  rename(
    longitude = Lon,
    latitude = Lat,
    date = YearDay,
    time = Time, 
    satellite = Satellite,
    method_of_detect = Method.of.Detect,
    ecosys = Ecosys,
    frp = Fire.RadPower
  ) %>%
  mutate(
    date = as.Date(as.character(date),          
                   format = "%Y%j"),
    method_of_detect = as.factor(method_of_detect),
    satellite = as.factor(satellite),
    ecosys = as.factor(ecosys),
    time = substr(as.POSIXct(sprintf("%04.0f", time), format='%H%M'), 12, 16)
  )
# Add a new column for date and time
firetxt <- firetxt %>%
  mutate(
    date_comp = as.POSIXct(paste(date, time), 
                            format = "%Y-%m-%d %H:%M"),
    frp = na_if(frp, -999.000)
  )

We also use the California boundary file to restrict our view to only points in California.

cal_bound <- st_read("../ca-state-boundary/CA_State_TIGER2016.shp")

# Convert to the same coordinate system as HMS (4326)
cal_bound <- cal_bound %>%
  st_set_crs(3857) %>% 
  st_transform(4326)
# Get an array of whether each observation is in California
in_bound <- lengths(st_intersects(fire, cal_bound))>0
# Get subset with indexing
# https://gis.stackexchange.com/questions/394954/r-using-st-intersects-to-classify-points-inside-outside-and-within-a-buffer
in_cali <- fire[in_bound,]

Cleaning

in_cali <- as.data.frame(in_cali) %>%
  dplyr::select(-Name) %>%
  # Remove prefix
  mutate(Description = gsub('Fire Attributes: YearDay: ', '', Description)) %>%
  # Replace column name strings with comma
  mutate(Description = gsub('[a-zA-Z]*: ', ',', Description)) %>%
  # Separate based on comma
  separate(., col=Description, 
           into = c('date', 'time', 'satellite','method_of_detect'),
           sep = ',') 
in_cali <- in_cali %>% 
  # Remove unnecessary substrings
  mutate(satellite = (str_remove_all(satellite,"Method of"))) %>%
  # Convert into correct datetime
  mutate( time = substr(as.POSIXct(sprintf("%04.0f", as.integer(time)), format='%H%M'), 12, 16),
          start_time = as.POSIXct(paste(date, time),
                            format = "%Y%j %H:%M"),
          date = as.POSIXct(date, format = "%Y%j")) %>%
  dplyr::select(-time)

Smoke Data: Preprocessing & Visualization

# Read light, medium, heavy separately
# Combine data from different days
filelist = list.files(path="../data/smoke/", pattern = "*.kml")
filelist <- filelist[-1]

#assuming tab separated values with a header    
datalist = lapply(filelist, function(x)st_read(paste("../data/smoke/", x, sep=""), layer="Smoke (Light)"))

#assuming the same header/columns for all files
smoke_light <- do.call("rbind", datalist) 

# smoke_light <- st_read("../data/smoke/smoke20181108.kml", layer="Smoke (Light)")
smoke_light <- as.data.frame(smoke_light) %>%
  mutate(type="light")
#assuming tab separated values with a header    
datalist = lapply(filelist, function(x)st_read(paste("../data/smoke/", x, sep=""), layer="Smoke (Medium)"))
smoke_med <- do.call("rbind", datalist) 
# smoke_med <- st_read("../data/smoke/smoke20181108.kml", layer="Smoke (Medium)")
smoke_med <- as.data.frame(smoke_med) %>%
  mutate(type="medium")
datalist = lapply(filelist, function(x)st_read(paste("../data/smoke/", x, sep=""), layer="Smoke (Heavy)"))
smoke_heavy <- do.call("rbind", datalist) 
# smoke_heavy <- st_read("../data/smoke/smoke20181108.kml", layer="Smoke (Heavy)")
smoke_heavy <- as.data.frame(smoke_heavy) %>%
  mutate(type="heavy")

smoke <- list(smoke_light, smoke_med, smoke_heavy)

smoke <- smoke %>%
  reduce(full_join, by=c('Name', 'Description', 'type','geometry'))
smoke <- smoke %>%
  dplyr::select(-Name) %>%
  mutate(Description = gsub('Smoke Attributes: Start Time: ', '', Description)) %>%
  mutate(Description = gsub('[a-zA-Z]*: ', ',', Description)) %>%
  separate(., col=Description, 
           into = c('start_time', 'end_time', 'density', 'satellite'),
           sep = ',') 
# Switch off spherical geometry
sf_use_s2(FALSE)
# Clean time and add smoke area
smoke <- smoke %>% 
  # Remove unnecessary substrings
  mutate(start_time = (str_remove_all(start_time,"[a-zA-Z]")),
         end_time = (str_remove_all(end_time,"[a-zA-Z]"))) %>%
  # Separate date and time based on space
  separate(., col=start_time, 
           into=c('start_date', 'st'),
           sep=' ') %>%
  separate(., col=end_time, 
           into=c('end_date','et'),
           sep=' ') %>%
  # Convert into datetime object
  mutate( st = substr(as.POSIXct(sprintf("%04.0f", as.integer(st)), format='%H%M'), 12, 16),
          et = substr(as.POSIXct(sprintf("%04.0f", as.integer(et)), format='%H%M'), 12, 16),
          start_time = as.POSIXct(paste(start_date, st),
                            format = "%Y%j %H:%M"),
         end_time = as.POSIXct(paste(end_date, et),
                            format = "%Y%j %H:%M"),
         area = st_area(smoke$geometry)) %>%
  dplyr::select(-start_date, -end_date, -st, -et)

Mapping Data for the Week

HDBSCAN

First, we plot all the fire detection points in California.

Then, we plot it making adjustments to the coordinate system.

Using these coordinates, we input into HDBSCAN specifying minPts = 100.

We can plot the resulting clusters, excluding the outliers (belonging to cluster = 0).

The points can also be shaded based on their membership probability. The higher the probability, the more solid the color.